
############################################################################
# Conditional logistic regression analysis of shadow rapporteur appointments
############################################################################

# Programme:  shdw.analysis07d-final.r
# Project:    EP shadow rapporteur selection
# Author:		  Frank Haege, Department of Politics and Administration, University of Limerick
# Contact:		frank.haege@ul.ie

# Description
#############
# This script estimates conditional logistic regression models of shadow rapporteur selection
# for the pooled sample and for individual party groups as presented in Table A3 of the online appendix.
# It replicates the conditional regression analysis presented in Table 2 of the article, predicting 
# shadow rapporteurship appointments in 2013/2014 with a policy interest similarity variable 
# that is based only on MEPs' involvement in reports between 2009 and 2012.
# It also produces a plot of the predicted values over the range of the similarity in policy interests
# variable, holding the other explanatory variables constant.


# Remove all objects
rm(list = ls(all = TRUE))

# Load libraries
library(ggplot2)
library(ggpubr)
library(texreg)
library(survival)
library(data.table)

# Change default setting
options(stringsAsFactors = FALSE)
theme_set(theme_classic())

# Set working directory
setwd("C:\\Users\\Frank\\Dropbox\\European Union\\EP shadow rapporteurs\\EUP submission")

# Load data
load("Data analysis\\shdw-management05-clean.RData")

# Limit sample to 2013 and 2014
df = df[year(df$doc.date)>=2013, ]


# Table 1: Conditional logistic regression analysis of shadow rapporteur appointments
#####################################################################################

# Pooled model
res1 = clogit(shadow ~ 
                joint.country + 
                ccc.pre2013 +
                term.no +
                diff.median.leftright +
                diff.median.eu.position +
                diff.median.galtan +
                diff.rapp.leftright + 
                diff.rapp.eu.position + 
                diff.rapp.galtan + 
                com.leader + 
                com.sub + 
                log.npd.share + 
                epg.leader +
                absence + 
                strata(rep.epg), data=df)

# GUE-NGL
res2 = clogit(shadow ~ 
                joint.country + 
                ccc.pre2013 +
                term.no +
                diff.median.leftright +
                diff.median.eu.position +
                diff.median.galtan +
                diff.rapp.leftright + 
                diff.rapp.eu.position + 
                diff.rapp.galtan + 
                com.leader + 
                com.sub + 
                log.npd.share + 
                epg.leader +
                absence + 
                strata(rep.epg), data=df[df$epg.accro=="GUE-NGL", ])

# Greens
res3 = clogit(shadow ~ 
                joint.country + 
                ccc.pre2013 +
                term.no +
                diff.median.leftright +
                diff.median.eu.position +
                diff.median.galtan +
                diff.rapp.leftright + 
                diff.rapp.eu.position + 
                diff.rapp.galtan + 
                com.leader + 
                com.sub + 
                log.npd.share + 
                epg.leader +
                absence + 
                strata(rep.epg), data=df[df$epg.accro=="GREENS", ])

# S&D
res4 = clogit(shadow ~ 
                joint.country + 
                ccc.pre2013 +
                term.no +
                diff.median.leftright +
                diff.median.eu.position +
                diff.median.galtan +
                diff.rapp.leftright + 
                diff.rapp.eu.position + 
                diff.rapp.galtan + 
                com.leader + 
                com.sub + 
                log.npd.share + 
                epg.leader +
                absence + 
                strata(rep.epg), data=df[df$epg.accro=="S&D", ])

# ALDE
res5 = clogit(shadow ~ 
                joint.country + 
                ccc.pre2013 +
                term.no +
                diff.median.leftright +
                diff.median.eu.position +
                diff.median.galtan +
                diff.rapp.leftright + 
                diff.rapp.eu.position + 
                diff.rapp.galtan + 
                com.leader + 
                com.sub + 
                log.npd.share + 
                epg.leader +
                absence + 
                strata(rep.epg), data=df[df$epg.accro=="ALDE", ])

# EPP
res6 = clogit(shadow ~ 
                joint.country + 
                ccc.pre2013 +
                term.no +
                diff.median.leftright +
                diff.median.eu.position +
                diff.median.galtan +
                diff.rapp.leftright + 
                diff.rapp.eu.position + 
                diff.rapp.galtan + 
                com.leader + 
                com.sub + 
                log.npd.share + 
                epg.leader +
                absence + 
                strata(rep.epg), data=df[df$epg.accro=="EPP", ])

# ECR
res7 = clogit(shadow ~ 
                joint.country + 
                ccc.pre2013 +
                term.no +
                diff.median.leftright +
                diff.median.eu.position +
                diff.median.galtan +
                diff.rapp.leftright + 
                diff.rapp.eu.position + 
                diff.rapp.galtan + 
                com.leader + 
                com.sub + 
                log.npd.share + 
                epg.leader +
                absence + 
                strata(rep.epg), data=df[df$epg.accro=="ECR", ])


# Print results with regression coefficients to screen
screenreg(list(res1, res2, res3, res4, res5, res6, res7), omit.coef = "(com.accro)|(epg.accro)",
          custom.model.names=c("Full Sample", levels(df$epg.accro)[1:6]), 
          include.rsquared = F, include.maxrs = F)

# Print results with odds ratios to screen
screenreg(list(res1, res2, res3, res4, res5, res6, res7),
          custom.model.names=c("Full Sample", levels(df$epg.accro)[1:6]),
          override.coef=list(exp(coef(res1)), exp(coef(res2)), exp(coef(res3)),
                             exp(coef(res4)), exp(coef(res5)), exp(coef(res6)),
                             exp(coef(res7))),
          override.se=list(exp(coef(res1))*sqrt(diag(res1$var)), 
                           exp(coef(res2))*sqrt(diag(res2$var)), 
                           exp(coef(res3))*sqrt(diag(res3$var)),
                           exp(coef(res4))*sqrt(diag(res4$var)), 
                           exp(coef(res5))*sqrt(diag(res5$var)), 
                           exp(coef(res6))*sqrt(diag(res6$var)),
                           exp(coef(res7))*sqrt(diag(res7$var))), 
          include.rsquared = F, include.maxrs = F)

# Export table
htmlreg(list(res1, res2, res3, res4, res5, res6, res7), 
        custom.coef.map = list(
          "diff.median.leftright"="Left-Right",
          "diff.median.eu.position"="EU Support",
          "diff.median.galtan"="GAL/TAN",
          "diff.rapp.leftright"="Rapporteur Left-Right",
          "diff.rapp.eu.position"="Rapporteur EU Support",
          "diff.rapp.galtan"="Rapporteur GAL/TAN",        
          "joint.country"="Similar National Salience",
          "ccc.pre2013"="Similar Policy Interests",
          "term.no"="Seniority",
          "com.leader"="Committee Leader",
          "log.npd.share"="National Delegation Size",
          "com.sub"="Substitute Member",
          "epg.leader"="Party Group Leader",
          "absence"="Vote Absenteeism"),
        digits=2,
        override.coef=list(exp(coef(res1)), exp(coef(res2)), exp(coef(res3)),
                           exp(coef(res4)), exp(coef(res5)), exp(coef(res6)),
                           exp(coef(res7))),
        override.se=list(exp(coef(res1))*sqrt(diag(res1$var)), 
                         exp(coef(res2))*sqrt(diag(res2$var)), 
                         exp(coef(res3))*sqrt(diag(res3$var)),
                         exp(coef(res4))*sqrt(diag(res4$var)), 
                         exp(coef(res5))*sqrt(diag(res5$var)), 
                         exp(coef(res6))*sqrt(diag(res6$var)),
                         exp(coef(res7))*sqrt(diag(res7$var))),
        custom.model.names=c("Full Sample", "GUE-NGL", "Greens", "S&D", "ALDE", "EPP", "ECR"), 
        include.rsquared = F, include.maxrs = F,
        file="Data analysis\\Tables\\shdw-analysis01d-final-pre2013.doc")


# Figure 1: Effect of similar policy interests on pobability of shadow rapporteurship
#####################################################################################

# Estimate pooled conditional logit model
res = clogit(shadow ~ 
                 joint.country + 
                 ccc.pre2013 +
                 term.no + 
                 diff.median.leftright +
                 diff.median.eu.position +
                 diff.median.galtan +
                 diff.rapp.leftright + 
                 diff.rapp.eu.position + 
                 diff.rapp.galtan + 
                 com.leader + 
                 com.sub + 
                 log.npd.share + 
                 epg.leader +
                 absence + 
                 strata(rep.epg), data=df)
summary(res)

# Create new data for prediction
# Sets all other quantitative variables to their mean and dummy variables to zero
# The strata is set to S&D and A7-0151/2013: proposal for a regulation of the European Parliament and of the Council 
# amending Regulation (EC) No 443/2009 to define the modalities for reaching the 2020 target to reduce CO2 emissions 
# from new passenger cars
newdata = data.frame(joint.country = 0,
                       ccc.pre2013 = seq(from=-9, to=74, length.out=100),
                       term.no = mean(df$term.no, na.rm=T),
                       diff.median.leftright = mean(df$diff.median.leftright, na.rm=T),
                       diff.median.eu.position = mean(df$diff.median.eu.position, na.rm=T),
                       diff.median.galtan = mean(df$diff.median.galtan, na.rm=T),
                       diff.rapp.leftright = mean(df$diff.rapp.leftright, na.rm=T),
                       diff.rapp.eu.position = mean(df$diff.rapp.eu.position, na.rm=T),
                       diff.rapp.galtan = mean(df$diff.rapp.galtan, na.rm=T),        
                       com.leader = mean(df$com.leader, na.rm=T),
                       log.npd.share = mean(df$log.npd.share, na.rm=T),
                       com.sub = 0,
                       epg.leader = 0,
                       absence = mean(df$absence, na.rm=T),
                       rep.epg = "A7-0151/2013.ENVI.S&D")

# Predicted values 
# (see https://markmail.org/search/?q=list%3Aorg.r-project.r-help+predict+clogit#query:list%3Aorg.r-project.r-help%20predict%20clogit%20from%3A%22Therneau%2C%20Terry%20M.%2C%20Ph.D.%22+page:1+mid:tsbl3cbnxywkafv6+state:results)
lin.pred = predict(res, newdata, type="lp", se.fit=T)

# Create confidence interval
upr = lin.pred$fit + (1.96 * lin.pred$se.fit)
lwr = lin.pred$fit - (1.96 * lin.pred$se.fit)
fit = lin.pred$fit

# Transform predicted values and confidence intervals using link function
newdata$fit = (exp(fit)/(1+exp(fit)))*100
newdata$upr = (exp(lwr)/(1+exp(lwr)))*100
newdata$lwr = (exp(upr)/(1+exp(upr)))*100
rm(fit, lwr, upr, lin.pred)

# Create data for rug plot
hist(df$ccc.pre2013, probability = T, breaks=100)
tail(df$ccc.pre2013[order(df$ccc.pre2013)], 20)
cat = hist(df$ccc.pre2013, probability = T, breaks=100)
h = data.frame("freq"=cat$counts, "prop"=cat$density*100, "mids"=cat$mids, "y"=0)
rm(cat)

# Plot effect of similarity of policy interests on probability of shadow rapporteur selection
p = ggplot() + 
  geom_line(data=newdata, mapping=aes(x=ccc.pre2013, y=fit), col="black", size=1) +
  geom_segment(data=h, size=1.1, show.legend=FALSE,
               aes(x=mids, xend=mids, y=y, yend=prop), colour="black", alpha=0.6) +
  geom_ribbon(data=newdata,aes(x=ccc.pre2013, ymin=lwr,ymax=upr),alpha=0.1) +
  ylab("Probability of selection (%)") +
  ylim(0, 100) +
  xlab("Similarity of policy interests") +
  theme_classic(base_size=20)
p

# Export figure
ggexport(p, filename="Data analysis\\Figures\\shdw-analysis01d-final-pre2013.png", width=500, height=500)